/* 
[] brackets indicate redacted internal census variable name or directory that can't be disclosed. 

Create the plant capacity within X miles of each pik address in each year. 
Using plants centroids from plants_csm_2022.sas7bdat and shapefiles from usmile200p.sas7bdat.

- Do proc ginside for each state and year for a 100 mile buffer around a plant
- keep piks within the buffer, then aggregate the plant capacity around the pik residence locations
- in order to aggregate, use geodist() which is a Cartesian product match - huge. 
- need to narrow down to the smallest set of piks so that geodist() is feasible.
- this is the reason for cutting to county-year or county-2-digit-pik-year. 

- loop over state, year, county
- calculate geodists, sum capacities, and append data
- 100-mile to 20-mile buffer.
*/ 

* define libraries;
libname ben '/projects/users/########/Snapshot2022/IntermediateData';
libname hantmp '/projects/users/########/Snapshot2022/IntermediateData/TempData';
libname shpf '/projects/users/########/Snapshot2022/IntermediateData/Shapefiles';

/*
STEPS: 
- Get piks and lat/lon for workers in year/state from ehf_icf_spine
  - has x, y, and [county id]
- PROC GINSIDE to flag inside the 100-mile buffer
- DATA step to drop workers outside of buffer
- get total wind capacity in the distance bands from each pik BY LOOPING THROUGH COUNTIES/2-digit piks
- append to previous state/year/county etc.
ONCE all are appended: 
- then merge with EHF workers - ehf_icf_spine, 
      merge the wind capacity to worker dataset, 
      using SQL LEFT JOIN from hannah.ehf_icf_spine to this dataset.
*/

* define macro "statecounty_piks_year_caps" which does 100-mile buffer for certain years, states by county;
%macro statecounty_piks_year_caps;
  %let stfip = 01 04 05 06 08 10 11 17 18 19 20 23 24 29 30 31 32 35 38 40 47 51 56;
  %local I next_fip;
  %let I = 1;
  %do %while (%scan(&stfip, &I) ne );
    %let next_fip = %scan(&stfip, &I);
      %do yr = 2000 %to 2021;
	*GET WORKER PIKS FOR A STATE/YEAR;
	*Create temp file of all piks residences in current state and year;
	DATA hantmp.icf_&next_fip._&yr;
	  SET ben.ehf_icf_spine (KEEP = [county id] pik year y x);
	  IF SUBSTR([county id],1,2) = "&next_fip" AND year = &yr;
	RUN;

	*Flag observations inside the buffer;
	PROC GINSIDE 
	  DATA	= hantmp.icf_&next_fip._&yr
	  MAP 	= shpf.USmile100p
	  OUT	= hantmp.in_100m_&next_fip._&yr;
	  ID _ID_;
	RUN;
	data hantmp.in_100m_&next_fip._&yr;
	  set hantmp.in_100m_&next_fip._&yr (keep = x y _ID_ pik year [county id]);
	  IF CMISS(_ID_) THEN DELETE;
	  rename _ID_ = id100;
	  statef = &next_fip;
	run;

	* Now bring in plant data, make separate dataset just within the buffer;
	* Subset plant data to year and state (&yr, &next_fip);
	data hantmp.plnts_&yr;
	  set ben.plants_csm_2022;
	  IF p_year <= &yr;
	run;

	* Now loop through counties in state &next_fip;
	%let sqlobs=0;
	%let cnties=;
	proc sql;
	  select distinct [county id] 
	    into :cnties 
	    separated by ' ' 
	    from hantmp.in_100m_&next_fip._&yr where not missing([county id]);
	quit;
	%let my_count = &sqlobs;
	%if &my_count gt 0 %then %do;
	%do ct = 1 %to %sysfunc(countw(&cnties));
	  %let next_ct = %scan(&cnties,&ct,%str( ));
	    data hantmp.in_100m_&next_fip._&yr._&next_ct;
	      set hantmp.in_100m_&next_fip._&yr;
	      if [county id] = &next_ct;
	    run;
	    proc sql; 
	      CREATE TABLE hantmp.hh_windcapsum_&next_fip._&yr._&next_ct AS
	      SELECT 
		A.*
		,E.stfips
		,E.ylat
		,E.xlong
		,E.p_year
		,E.p_cap	AS plantcap
		,E.p_tnum	AS turbnum
		,GEODIST(A.y, A.x, E.ylat, E.xlong, 'M' ) AS dist_to_turb
	      FROM hantmp.in_100m_&next_fip._&yr._&next_ct AS a
		LEFT JOIN hantmp.plnts_&yr AS e ON statef
	      WHERE CALCULATED dist_to_turb <= 100
		AND CALCULATED dist_to_turb ^=.
	      ORDER BY A.pik, A.year; 
	    quit;
	    proc sql;
	      CREATE TABLE hantmp.hh_wcapsum_&next_fip._&yr._&next_ct AS
	      SELECT pik, year, 
	      SUM(plantcap) as plantcap100,
	      SUM(case when dist_to_turb<=80 then plantcap else 0 end) as plantcap80,
	      SUM(case when dist_to_turb<=60 then plantcap else 0 end) as plantcap60,
	      SUM(case when dist_to_turb<=40 then plantcap else 0 end) as plantcap40,
	      SUM(case when dist_to_turb<=20 then plantcap else 0 end) as plantcap20,
	      MAX([county id]) as [county id],
	      MAX(statef) as statef,
	      MAX(y) as y,
	      MAX(x) as x
	      FROM hantmp.hh_windcapsum_&next_fip._&yr._&next_ct
	      GROUP BY pik, year
	      ORDER BY pik, year;
	    quit;
	    PROC APPEND base = ben.all_styr_100p 
	      data = hantmp.hh_wcapsum_&next_fip._&yr._&next_ct;
	    RUN;
	    PROC DATASETS library = hantmp nolist;
	      delete in_100m_&next_fip._&yr._&next_ct hh_windcapsum_&next_fip._&yr._&next_ct hh_wcapsum_&next_fip._&yr._&next_ct;
	    RUN; QUIT;
	  %end;
	 
	PROC DATASETS library = hantmp nolist;
	  delete icf_&next_fip._&yr plnts_&yr ;
	RUN; QUIT;
	PROC DATASETS library = hantmp nolist;
	  delete in_100m_&next_fip._&yr;
	RUN; QUIT;
      %end;
    %end;
    %let I = %eval(&I + 1);
  %end;
%mend;


* remove any old copies of appended "base" dataset before running macro;
PROC DATASETS library = ben nolist;
  delete all_styr_100p;
RUN; QUIT;

%statecounty_piks_year_caps;






